#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cuda_runtime.h>

const int ntpb = 512;

//Generates random numbers and assigns them to the array
void fillArray(int *arr, int size)
{

  //Assign a random number inbetween 0 and size
  for (int i = 0; i < size; i++)
  {
    arr[i] = rand() % size;
  }
}

//Check the sorted array
void check(int *arr, int size)
{
  int counter = 0;
  for (int i = 0; i < size - 1; i++)
  {

    //Compare the current index to index+1
    if (arr[i] > arr[i + 1])
    {
      counter++;
      //std::cout << "error at: " << i << "&&" << i+1 << " ----->" << arr[i] << " " << arr[i+1] << std::endl;
    }
  }

  //Print # of errors
  std::cout << counter << std::endl;
}

__global__ void oddeven_bubble(int *a, int flag, int size)
{

  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int temp;

  if ((index >= size / 2 - 1) && flag % 2 != 0)
    return;

  if (flag % 2 == 0)
  {
    if (a[index * 2] > a[index * 2 + 1])
    {
      temp = a[index * 2];
      a[index * 2] = a[index * 2 + 1];
      a[index * 2 + 1] = temp;
    }
  }
  else
  {
    if (a[index * 2 + 1] > a[index * 2 + 2])
    {
      temp = a[index * 2 + 1];
      a[index * 2 + 1] = a[index * 2 + 2];
      a[index * 2 + 2] = temp;
    }
  }
}

//Host function, also acts as a wrapper
__host__ void bubble_sort(int *a, int size)
{

  //Allocate memory
  int *d_a = nullptr;
  cudaMalloc((void **)&d_a, size * sizeof(int));

  //Copy to device
  cudaMemcpy(d_a, a, size * sizeof(int), cudaMemcpyHostToDevice);

  int nb = (size + 1024 - 1) / 1024;

  // Launch on device
  for (int i = 0; i < size - 1; i++)
  {
    oddeven_bubble<<<nb, ntpb>>>(d_a, i, size);
  }

  //Sync device
  cudaDeviceSynchronize();

  // Copy back to CPU memory space
  cudaMemcpy(a, d_a, size * sizeof(int), cudaMemcpyDeviceToHost);

  // Free memory on the device 7 Reset
  cudaFree(d_a);
  cudaDeviceReset();
}

int main(int argc, char *argv[])
{
  //Get the size of the array
  int n = std::atoi(argv[1]);

  // Create 6 arrays of size n and allocate memory for them
  int *bubbleArray = new int[n];

  //Fill the array with randomly generated numbers between 0 & n
  fillArray(bubbleArray, n);

  //Sort using bubble
  bubble_sort(bubbleArray, n);

  //Check
  std::cout << "Errors found: ";
  check(bubbleArray, n);

  std::cout << "Sort is complete!" << std::endl;

  //Deallocate the arrays
  delete[] bubbleArray;

  return 0;
}